library(tidyverse)
library(patchwork)
library(wesanderson)
library(scales)
library(tidytext)
library(wordcloud)

setwd('nval/palisr-june2024')


## DATA LOADING AND PREP ##

ccc <- read.csv('ccc-20240613.csv') %>%
  mutate(date = lubridate::date(date),
         fips_code = ifelse(nchar(fips_code) == 4, paste0("0", fips_code), fips_code),
         issues = ifelse(issues == "", "other", issues)) %>%
  filter(!is.na(date))

edge_date <- lubridate::date("2024-06-07")

# regex for schools

schools <- c("college(?! (?:st(reet)?|ave(nue)?|r(oa)?d|cir(cle)?|dr(ive)?\\b|blvd|heights|point|green|athletic))",
             "university(?! (?:st(reet)?|ave(nue)?|r(oa)?d|cir(cle)?|dr(ive)?\\b|blvd|heights|city|behavioral|hospital|plaza|lakes|office|irving))",
             "school(?! (?:st(reet)?\\b|ave(nue)?|r(oa)?d|cir(cle)?|dr(ive)?\\b|blvd|heights))",
             "\\bcooper union",
             "institute of technology",
             "\\bpoly(technic (state )?(?:institute|university))?",
             "auraria campus",
             "pentacrest",
             "(?:naval|air force|military) academy|west point(?! hwy)",
             "\\b(?:c|s)uny\\b",
             "\\buc\\b(?! theatre)")

regex_schools <- paste(schools, collapse = "|")

# pro-pal

pal <- ccc %>%
  filter(date >= "2023-10-07" & date <= edge_date) %>%
  filter(grepl("for palestinian (?:liberation|rights)|free palestine(?! from hamas)|in solidarity with (?:palestin|gaza)|in remembrance of palestin|ceasefire in gaza|against apartheid in israel|(?:for ending|against) israel's occupation of palestin|genocide of palestin", claims, ignore.case = TRUE, perl = TRUE)) %>%
  filter(!grepl("in solidarity with Israel", claims, ignore.case = TRUE)) %>%
  mutate(schools = ifelse(grepl(regex_schools, location_detail, ignore.case = TRUE, perl = TRUE), 1, 0),
         encampment = ifelse(grepl("encamp", participant_measures, ignore.case = TRUE), 1, 0),
         counter = as.integer(grepl("counter.?protest", type, ignore.case = TRUE)),
         counterprotested = ifelse(!is.na(macroevent), 1, 0))

print(nrow(pal))

# pro-isr

isr <- ccc %>%
  filter(date >= "2023-10-07" & date <= edge_date) %>%
  filter(grepl("against violent attack on israel|(?:in|for) (?:solidarity with|support of) israel|stand with israel", claims, perl = TRUE, ignore.case = TRUE)) %>%
  filter(!grepl("for palestinian liberation|palestine will be free|free free Palestine", claims, perl = TRUE, ignore.case = TRUE)) %>%
  mutate(schools = ifelse(grepl(regex_schools, location_detail, ignore.case = TRUE, perl = TRUE), 1, 0),
         encampment = ifelse(grepl("encamp", participant_measures, ignore.case = TRUE), 1, 0),
         counter = as.integer(grepl("counter.?protest", type, ignore.case = TRUE)),
         counterprotested = ifelse(!is.na(macroevent), 1, 0))

print(nrow(isr))

## STATS ##

# how many pro-Pal events?
nrow(pal)
# how many participants?
sum(pal$size_mean, na.rm = T)
sum(pal$size_low, na.rm = T)
sum(pal$size_high, na.rm = T)
sum(!is.na(pal$size_mean))
100 * (sum(!is.na(pal$size_mean))/nrow(pal))

# how many pro-Isr events?
nrow(isr)
# how many participants?
sum(isr$size_mean, na.rm = T)
sum(isr$size_low, na.rm = T)
sum(isr$size_high, na.rm = T)
sum(!is.na(isr$size_mean))
100 * (sum(!is.na(isr$size_mean))/nrow(isr))

# how many pro-Isr in 1st four weeks?
nrow(filter(isr, date <= "2023-11-07"))

# how many pro-Isl are RFTL?
nrow(filter(isr, grepl("run for their lives", organizations, ignore.case = T)))
nrow(filter(isr, grepl("run for their lives", organizations, ignore.case = T)))/nrow(isr)

# how many pro-Isr are counters?
sum(isr$counter)
sum(isr$counter)/nrow(isr)
sum(pal$counter)
sum(pal$counter)/nrow(pal)

# how many pro-Pal at schools?
nrow(filter(pal, schools == 1 & date <= "2024-04-17"))
nrow(filter(pal, date <= "2024-04-17"))
nrow(filter(pal, schools == 1 & date <= "2024-04-17"))/nrow(filter(pal, date <= "2024-04-17"))
nrow(filter(pal, schools == 1 & date > "2024-04-17"))
nrow(filter(pal, date > "2024-04-17"))
nrow(filter(pal, schools == 1 & date > "2024-04-17"))/nrow(filter(pal, date> "2024-04-17"))

# pro-Pal arrests?
pal$arrests <- as.integer(pal$arrests)
sum(pal$arrests, na.rm = T)
with(filter(pal, schools == 0), sum(arrests, na.rm = T))
with(filter(pal, schools == 0), sum(arrests, na.rm = T)) / sum(pal$arrests, na.rm = T)

# 'violence' in general?
sum(pal$property_damage_any)
sum(pal$injuries_police_any)
# 'violence' on campuses?
nrow(filter(pal, schools == 1))
with(filter(pal, schools == 1), sum(property_damage_any))
with(filter(pal, schools == 1), sum(injuries_police_any))


## CHARTS ##

# daily counts: pro-Palestine

png("palisr-june2024-daily-counts-pal.png", res = 300, width = 7, height = 5, unit = "in")
pal_daily_events_chart <- pal %>%
  group_by(date) %>%
  tally() %>%
  ungroup() %>%
  ggplot(aes(date, n)) +
    geom_col() +
    theme_minimal() +
    theme(axis.title = element_blank(),
          panel.grid.minor = element_blank())
pal_daily_crowds_chart <- pal %>%
  group_by(date) %>%
  summarize(n = sum(size_mean, na.rm = TRUE)) %>%
  ggplot(aes(date, n)) +
    geom_col() +
    theme_minimal() +
    scale_y_continuous(labels = comma) +
    theme(axis.title = element_blank(),
          panel.grid.minor = element_blank())
pal_daily_events_chart / pal_daily_crowds_chart +
  plot_annotation(title = "Daily tallies for U.S. pro-Palestine protests",
                  subtitle = "October 7\u2013June 7, 2024",
                  caption = "Source: Crowd Counting Consortium",
                  tag_levels = list(c("count of\nevents",
                                      "estimated\nparticipants"
                                      ))
                  ) & theme(
                  plot.tag = element_text(size = 10),
                  plot.tag.position = "right"
                  )
dev.off()

# daily event counts: pro-Israel

png("palisr-june2024-daily-counts-isr.png", res = 300, width = 7, height = 5, unit = "in")
isr_daily_events_chart <- isr %>%
  group_by(date) %>%
  tally() %>%
  ungroup() %>%
  ggplot(aes(date, n)) +
    geom_col() +
    theme_minimal() +
    theme(axis.title = element_blank(),
          panel.grid.minor = element_blank())
isr_daily_crowds_chart <- isr %>%
  group_by(date) %>%
  summarize(n = sum(size_mean, na.rm = TRUE)) %>%
  ggplot(aes(date, n)) +
    geom_col() +
    theme_minimal() +
    scale_y_continuous(labels = comma) +
    theme(axis.title = element_blank(),
          panel.grid.minor = element_blank())
isr_daily_events_chart / isr_daily_crowds_chart +
  plot_annotation(title = "Daily tallies for U.S. pro-Israel protests",
                  subtitle = "October 7\u2013June 7, 2024",
                  caption = "Source: Crowd Counting Consortium",
                  tag_levels = list(c("count of\nevents",
                                      "estimated\nparticipants"
                                      ))
                  ) & theme(
                  plot.tag = element_text(size = 10),
                  plot.tag.position = "right"
                  )
dev.off()

# arrests by action type

png("palisr-june2024-arrests-by-action-type.png", res = 300, width = 17/2, height = 9/2, unit = "in")
pal %>%
  mutate(da = ifelse(grepl("direct action", type), "yes", "no")) %>%
  group_by(date, da) %>%
  summarize(n = sum(arrests, na.rm = TRUE), .groups = "drop") %>%
# mutate(da = fct_relevel(da, "yes", "no")) %>%
  ggplot(aes(date, n, fill = da)) +
    geom_col() +
    theme_minimal() +
  scale_y_continuous(position = "left", labels = comma) +
  scale_x_date(date_labels = "%b %d, %Y") +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  labs(title = "Daily arrest totals at U.S. pro-Palestinian protests",
       subtitle = "October 7, 2023\u2013June 7, 2024", 
       caption = "Source: Crowd Counting Consortium") +
  scale_fill_manual(values = wes_palette("Royal1")[c(1,2)],
                    name = "civil disobedience\nor direct action?")
dev.off()

# word cloud of verbatim claims

regex_claimsum <- "^(?:for|against) |^in [[:alpha:]]{3,} (?:of|with)"

# get vector of verbatim claims
pal_claims <- str_trim(unlist(str_split(pal$claims, ",")))
pal_claims_verbatim <- pal_claims[!grepl(regex_claimsum, pal_claims, ignore.case = TRUE)]
pal_claims_verbatim <- pal_claims_verbatim[!grepl("flag$", pal_claims_verbatim, perl = TRUE, ignore.case = TRUE)]
pal_claims_verbatim <- str_replace_all(pal_claims_verbatim, "cease.fire", "ceasefire")

pal_words <- data.frame(claims = pal_claims_verbatim) %>%
    unnest_tokens(word, claims) %>%
    # remove stop words
    anti_join(stop_words) %>%
    # other cleanup
    mutate(word = gsub("\'s|\\banti|[[:punct:]]", "", word)) %>%
    filter(word != "strikethrough") %>%  # used to interpret poster imagery, not in slogans
    filter(!is.na(word)) %>%
    filter(nchar(word) > 2) %>%
    # convert pluralized words to singular
    # mutate(word = singularize(word)) %>%
    group_by(word) %>%
    tally() %>%
    arrange(-n)

pal_cloud_title <- "Relative frequencies of words in claims observed\nat U.S. pro-Palestine protests"

png("palisr-june2024-wordcloud.png", res = 300, width = 9, height = 9, units = "in")
layout(matrix(c(1, 2), nrow=2), heights=c(1,5))
par(mar=rep(0, 4))
plot.new()
text(x=0.5, y=0.5, pal_cloud_title, cex = 1.5)
with(pal_words, wordcloud(word, n,
                          min.freq = 10,
                          random.order = FALSE,
                          scale = c(5,1/2)))
dev.off()

print(pal_words[1:6,])

length(pal_claims_verbatim)
nrow(pal_words)



